import pandas as pd
linkData="https://github.com/SocialAnalytics-StrategicIntelligence/TableOperations/raw/main/dengue_ok.pkl"
dengue = pd.read_pickle(linkData)
# checking format
dengue.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 501236 entries, 0 to 501235 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 departamento 501236 non-null object 1 provincia 501236 non-null object 2 distrito 501236 non-null object 3 ano 501236 non-null int64 4 semana 501236 non-null int64 5 sexo 501236 non-null object 6 edad 501236 non-null int64 7 enfermedad 501236 non-null category 8 case 501236 non-null int64 dtypes: category(1), int64(4), object(4) memory usage: 31.1+ MB
# some exploration
dengue.describe().apply(lambda s: s.apply('{0:.5f}'.format))
| ano | semana | edad | case | |
|---|---|---|---|---|
| count | 501236.00000 | 501236.00000 | 501236.00000 | 501236.00000 |
| mean | 2014.77213 | 21.99838 | 28.96143 | 1.00000 |
| std | 6.14646 | 14.76658 | 18.15954 | 0.00000 |
| min | 2000.00000 | 1.00000 | 0.00000 | 1.00000 |
| 25% | 2011.00000 | 11.00000 | 15.00000 | 1.00000 |
| 50% | 2016.00000 | 18.00000 | 26.00000 | 1.00000 |
| 75% | 2020.00000 | 32.00000 | 41.00000 | 1.00000 |
| max | 2022.00000 | 53.00000 | 106.00000 | 1.00000 |
# exploring
dengue.enfermedad.value_counts()
enfermedad SIN_SEÑALES 443996 ALARMA 54981 GRAVE 2259 Name: count, dtype: int64
dengue['enfermedad_text']=dengue.enfermedad.astype(str)
dengue.replace({'enfermedad_text':{'SIN_SEÑALES':'1_SIN_SEÑALES','ALARMA':'2_ALARMA','GRAVE':'3_GRAVE'}},inplace=True)
# exploring
dengue.ano.value_counts(sort=False)
ano 2000 5557 2001 23526 2002 8086 2003 3349 2004 9547 2005 5640 2006 4022 2007 6344 2008 12824 2009 13407 2010 16842 2011 28084 2012 28505 2013 13092 2015 35816 2014 17234 2016 25160 2017 68279 2018 4698 2019 15287 2020 47932 2021 44791 2022 63214 Name: count, dtype: int64
binLimits=[0,15,50,110]
theLabels=["a_menor_a_16","b_entre_16y50","c_mayor_a_50"]
dengue["edad_grupos"]=pd.cut(dengue['edad'], include_lowest=True,
bins=binLimits,
labels=theLabels,
ordered=True)
# see
dengue.head()
| departamento | provincia | distrito | ano | semana | sexo | edad | enfermedad | case | enfermedad_text | edad_grupos | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | HUANUCO | LEONCIO PRADO | LUYANDO | 2000 | 47 | M | 9 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | a_menor_a_16 |
| 1 | HUANUCO | LEONCIO PRADO | LUYANDO | 2000 | 40 | F | 18 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
| 2 | HUANUCO | LEONCIO PRADO | JOSE CRESPO Y CASTILLO | 2000 | 48 | F | 32 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
| 3 | HUANUCO | LEONCIO PRADO | JOSE CRESPO Y CASTILLO | 2000 | 37 | F | 40 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
| 4 | HUANUCO | LEONCIO PRADO | MARIANO DAMASO BERAUN | 2000 | 42 | M | 16 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
pd.crosstab( dengue.enfermedad_text,dengue.edad_grupos, dropna=False, normalize='columns')
| edad_grupos | a_menor_a_16 | b_entre_16y50 | c_mayor_a_50 |
|---|---|---|---|
| enfermedad_text | |||
| 1_SIN_SEÑALES | 0.876868 | 0.890247 | 0.884549 |
| 2_ALARMA | 0.119010 | 0.105475 | 0.109204 |
| 3_GRAVE | 0.004122 | 0.004278 | 0.006248 |
pd.crosstab(dengue.enfermedad_text,[dengue.sexo,dengue.edad_grupos], dropna=False, normalize='columns')
| sexo | F | M | ||||
|---|---|---|---|---|---|---|
| edad_grupos | a_menor_a_16 | b_entre_16y50 | c_mayor_a_50 | a_menor_a_16 | b_entre_16y50 | c_mayor_a_50 |
| enfermedad_text | ||||||
| 1_SIN_SEÑALES | 0.875221 | 0.884646 | 0.881328 | 0.878431 | 0.897139 | 0.888295 |
| 2_ALARMA | 0.120614 | 0.110622 | 0.113616 | 0.117488 | 0.099142 | 0.104073 |
| 3_GRAVE | 0.004165 | 0.004731 | 0.005057 | 0.004081 | 0.003720 | 0.007633 |
Weekly
!pip install altair -U
!pip install "vegafusion-jupyter[embed]"
Requirement already satisfied: altair in /usr/local/lib/python3.10/dist-packages (5.4.1) Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from altair) (3.1.4) Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair) (4.19.2) Requirement already satisfied: narwhals>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from altair) (1.6.0) Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from altair) (24.1) Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.10/dist-packages (from altair) (4.12.2) Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (23.2.0) Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (2023.12.1) Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (0.35.1) Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (0.19.0) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->altair) (2.1.5) Requirement already satisfied: vegafusion-jupyter[embed] in /usr/local/lib/python3.10/dist-packages (1.6.9) Requirement already satisfied: ipywidgets<9,>=7.0.0 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (7.7.1) Requirement already satisfied: altair>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (5.4.1) Requirement already satisfied: vegafusion==1.6.9 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (1.6.9) Requirement already satisfied: vegafusion-python-embed==1.6.9 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (1.6.9) Requirement already satisfied: vl-convert-python>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (1.6.1) Requirement already satisfied: pyarrow>=5 in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (14.0.2) Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (2.0.3) Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (5.9.5) Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (3.20.3) Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (3.1.4) Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (4.19.2) Requirement already satisfied: narwhals>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (1.6.0) Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (24.1) Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (4.12.2) Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.5.6) Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.0) Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.7.1) Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.6.7) Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (7.34.0) Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.0.11) Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.1.12) Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.3.3) Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (71.0.4) Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.19.1) Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.4.2) Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.7.5) Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.0.47) Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.16.1) Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.0) Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.1.7) Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.9.0) Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (23.2.0) Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (2023.12.1) Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (0.35.1) Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (0.19.0) Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow>=5->vegafusion==1.6.9->vegafusion-jupyter[embed]) (1.25.2) Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.5.5) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->altair>=4.2.0->vegafusion-jupyter[embed]) (2.1.5) Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (2023.4) Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (2024.1) Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.8.4) Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (24.0.1) Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (23.1.0) Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.7.2) Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.10.4) Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.5.4) Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.6.0) Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.8.3) Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.18.1) Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.20.0) Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.1.0) Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.7.0) Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.13) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (1.16.0) Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter-core>=4.6.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.2.2) Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.4) Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.9.4) Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.12.3) Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.1.0) Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.7.1) Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.4) Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.3.0) Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.8.4) Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.10.0) Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.5.1) Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.3.0) Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.20.0) Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (21.2.0) Requirement already satisfied: jupyter-server<3,>=1.8 in /usr/local/lib/python3.10/dist-packages (from notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.24.0) Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.16.0) Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.5) Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.5.1) Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.22) Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.7.1) Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.8.0) Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.7) Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.3.1) Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.2.2)
import altair as alt
alt.data_transformers.enable("vegafusion")
DataTransformerRegistry.enable('vegafusion')
dengue = dengue[dengue['ano'] == 2017]
alt_dengue=alt.Chart(dengue)
enc_dengue=alt_dengue.encode(
x='semana:O',
y='mean(edad):Q',
color='enfermedad_text:N',
)
enc_dengue.mark_line() + enc_dengue.mark_errorband()
enc_dengue=alt_dengue.encode(
x='semana:O',
y='median(edad):Q',
color='enfermedad_text:N',
tooltip=['median(edad)','ano:T']
).interactive()
enc_dengue.mark_line().facet(
row='sexo:N',
column='edad_grupos:N'
)
enc_dengue=alt_dengue.encode(
x='semana:O',
y='median(edad):Q',
color='enfermedad_text:N',
tooltip=['median(edad)','semana:O']
).interactive()
enc_dengue.mark_line().facet(
row='sexo:N',
column='edad_grupos:N'
)
enc_dengue=alt_dengue.encode(
x='semana:O',
y=alt.Y('sum(case):Q'),
color='enfermedad_text:N',
tooltip=['sum(case):Q','semana:O']
).interactive()
enc_dengue.mark_line().facet(
row='sexo:N',
column='edad_grupos:N'
)
enc_dengue=alt_dengue.encode(
x='semana:O',
y=alt.Y('sum(case):Q', scale=alt.Scale(type='log')),
color='enfermedad_text:N',
tooltip=['sum(case):Q','semana:O']
).interactive()
enc_dengue.mark_line().facet(
row='sexo:N',
column='edad_grupos:N'
)
indexList=['edad_grupos','semana','sexo','enfermedad_text']
aggregator={'edad': ['median']}
LevelByWeek_medians=dengue.groupby(indexList,observed=True).agg(aggregator)
LevelByWeek_medians
| edad | ||||
|---|---|---|---|---|
| median | ||||
| edad_grupos | semana | sexo | enfermedad_text | |
| a_menor_a_16 | 1 | F | 1_SIN_SEÑALES | 9.0 |
| 2_ALARMA | 6.0 | |||
| M | 1_SIN_SEÑALES | 11.0 | ||
| 2_ALARMA | 12.5 | |||
| 3_GRAVE | 11.0 | |||
| ... | ... | ... | ... | ... |
| c_mayor_a_50 | 52 | F | 1_SIN_SEÑALES | 68.0 |
| 2_ALARMA | 63.0 | |||
| M | 1_SIN_SEÑALES | 59.5 | ||
| 2_ALARMA | 62.0 | |||
| 3_GRAVE | 69.0 |
726 rows × 1 columns
LevelByWeek_medians.unstack(['sexo','enfermedad_text'])
| edad | |||||||
|---|---|---|---|---|---|---|---|
| median | |||||||
| sexo | F | M | |||||
| enfermedad_text | 1_SIN_SEÑALES | 2_ALARMA | 3_GRAVE | 1_SIN_SEÑALES | 2_ALARMA | 3_GRAVE | |
| edad_grupos | semana | ||||||
| a_menor_a_16 | 1 | 9.0 | 6.0 | NaN | 11.0 | 12.5 | 11.0 |
| 2 | 13.0 | 12.5 | NaN | 10.0 | 12.0 | 5.0 | |
| 3 | 9.0 | 11.0 | NaN | 9.0 | 11.0 | 4.0 | |
| 4 | 10.0 | 12.0 | NaN | 11.0 | 4.0 | NaN | |
| 5 | 9.0 | 7.0 | NaN | 11.0 | 12.0 | NaN | |
| ... | ... | ... | ... | ... | ... | ... | ... |
| c_mayor_a_50 | 48 | 63.0 | 57.0 | 61.0 | 56.0 | 64.0 | NaN |
| 49 | 65.0 | 65.0 | NaN | 64.0 | NaN | 81.0 | |
| 50 | 63.0 | NaN | NaN | 57.0 | 56.0 | NaN | |
| 51 | 58.0 | 75.0 | NaN | 59.0 | 73.0 | NaN | |
| 52 | 68.0 | 63.0 | NaN | 59.5 | 62.0 | 69.0 | |
156 rows × 6 columns
alt_dengue=alt.Chart(dengue)
enc_dengue=alt_dengue.encode(
x='semana:O',
y=alt.Y('sum(case):Q', scale=alt.Scale(type='log')),
column='enfermedad_text:N',
)
enc_dengue.mark_bar()
indexList=['edad_grupos','semana','sexo','enfermedad_text']
aggregator={'edad': ['median','mean','min','max']}
LevelByWeek_statsFull=dengue.groupby(indexList,observed=True).agg(aggregator)
LevelByWeek_statsFull
| edad | |||||||
|---|---|---|---|---|---|---|---|
| median | mean | min | max | ||||
| edad_grupos | semana | sexo | enfermedad_text | ||||
| a_menor_a_16 | 1 | F | 1_SIN_SEÑALES | 9.0 | 8.750000 | 1 | 15 |
| 2_ALARMA | 6.0 | 7.111111 | 2 | 14 | |||
| M | 1_SIN_SEÑALES | 11.0 | 9.684211 | 1 | 15 | ||
| 2_ALARMA | 12.5 | 10.125000 | 2 | 14 | |||
| 3_GRAVE | 11.0 | 11.000000 | 11 | 11 | |||
| ... | ... | ... | ... | ... | ... | ... | ... |
| c_mayor_a_50 | 52 | F | 1_SIN_SEÑALES | 68.0 | 66.375000 | 51 | 78 |
| 2_ALARMA | 63.0 | 62.666667 | 54 | 71 | |||
| M | 1_SIN_SEÑALES | 59.5 | 63.666667 | 51 | 87 | ||
| 2_ALARMA | 62.0 | 62.333333 | 57 | 68 | |||
| 3_GRAVE | 69.0 | 69.000000 | 69 | 69 | |||
726 rows × 4 columns
Mining location Let's use departamento and provincia:
indexList=['semana','departamento','provincia','enfermedad_text']
aggregator={'case':['sum']}
ByWeekPlace=dengue.groupby(indexList,observed=True).agg(aggregator)
ByWeekPlace
| case | ||||
|---|---|---|---|---|
| sum | ||||
| semana | departamento | provincia | enfermedad_text | |
| 1 | AYACUCHO | HUANTA | 1_SIN_SEÑALES | 9 |
| 2_ALARMA | 2 | |||
| LA MAR | 1_SIN_SEÑALES | 5 | ||
| 2_ALARMA | 2 | |||
| CAJAMARCA | CONTUMAZA | 1_SIN_SEÑALES | 16 | |
| ... | ... | ... | ... | ... |
| 52 | PIURA | MORROPON | 1_SIN_SEÑALES | 1 |
| PIURA | 1_SIN_SEÑALES | 13 | ||
| SULLANA | 1_SIN_SEÑALES | 9 | ||
| UCAYALI | CORONEL PORTILLO | 1_SIN_SEÑALES | 23 | |
| 2_ALARMA | 3 |
2626 rows × 1 columns
#long to wide
ByWeekPlace.unstack()
| case | |||||
|---|---|---|---|---|---|
| sum | |||||
| enfermedad_text | 1_SIN_SEÑALES | 2_ALARMA | 3_GRAVE | ||
| semana | departamento | provincia | |||
| 1 | AYACUCHO | HUANTA | 9.0 | 2.0 | NaN |
| LA MAR | 5.0 | 2.0 | NaN | ||
| CAJAMARCA | CONTUMAZA | 16.0 | NaN | NaN | |
| CUSCO | LA CONVENCION | 18.0 | 3.0 | NaN | |
| HUANUCO | LEONCIO PRADO | 1.0 | NaN | NaN | |
| ... | ... | ... | ... | ... | ... |
| 52 | MADRE DE DIOS | TAMBOPATA | 33.0 | 21.0 | 2.0 |
| PIURA | MORROPON | 1.0 | NaN | NaN | |
| PIURA | 13.0 | NaN | NaN | ||
| SULLANA | 9.0 | NaN | NaN | ||
| UCAYALI | CORONEL PORTILLO | 23.0 | 3.0 | NaN | |
1795 rows × 3 columns
ByWeekPlace_wide=ByWeekPlace.unstack().fillna(0)
ByWeekPlace_wide
| case | |||||
|---|---|---|---|---|---|
| sum | |||||
| enfermedad_text | 1_SIN_SEÑALES | 2_ALARMA | 3_GRAVE | ||
| semana | departamento | provincia | |||
| 1 | AYACUCHO | HUANTA | 9.0 | 2.0 | 0.0 |
| LA MAR | 5.0 | 2.0 | 0.0 | ||
| CAJAMARCA | CONTUMAZA | 16.0 | 0.0 | 0.0 | |
| CUSCO | LA CONVENCION | 18.0 | 3.0 | 0.0 | |
| HUANUCO | LEONCIO PRADO | 1.0 | 0.0 | 0.0 | |
| ... | ... | ... | ... | ... | ... |
| 52 | MADRE DE DIOS | TAMBOPATA | 33.0 | 21.0 | 2.0 |
| PIURA | MORROPON | 1.0 | 0.0 | 0.0 | |
| PIURA | 13.0 | 0.0 | 0.0 | ||
| SULLANA | 9.0 | 0.0 | 0.0 | ||
| UCAYALI | CORONEL PORTILLO | 23.0 | 3.0 | 0.0 | |
1795 rows × 3 columns
sumCases=ByWeekPlace_wide.sum(axis=1)
sumCases
semana departamento provincia
1 AYACUCHO HUANTA 11.0
LA MAR 7.0
CAJAMARCA CONTUMAZA 16.0
CUSCO LA CONVENCION 21.0
HUANUCO LEONCIO PRADO 1.0
...
52 MADRE DE DIOS TAMBOPATA 56.0
PIURA MORROPON 1.0
PIURA 13.0
SULLANA 9.0
UCAYALI CORONEL PORTILLO 26.0
Length: 1795, dtype: float64
shareAlarma=ByWeekPlace_wide.loc[:,('case','sum','2_ALARMA')]/sumCases
shareAlarma.name='shareAlarma'
shareAlarma
semana departamento provincia
1 AYACUCHO HUANTA 0.181818
LA MAR 0.285714
CAJAMARCA CONTUMAZA 0.000000
CUSCO LA CONVENCION 0.142857
HUANUCO LEONCIO PRADO 0.000000
...
52 MADRE DE DIOS TAMBOPATA 0.375000
PIURA MORROPON 0.000000
PIURA 0.000000
SULLANA 0.000000
UCAYALI CORONEL PORTILLO 0.115385
Name: shareAlarma, Length: 1795, dtype: float64
shareAlarma=shareAlarma.reset_index()
shareAlarma
| semana | departamento | provincia | shareAlarma | |
|---|---|---|---|---|
| 0 | 1 | AYACUCHO | HUANTA | 0.181818 |
| 1 | 1 | AYACUCHO | LA MAR | 0.285714 |
| 2 | 1 | CAJAMARCA | CONTUMAZA | 0.000000 |
| 3 | 1 | CUSCO | LA CONVENCION | 0.142857 |
| 4 | 1 | HUANUCO | LEONCIO PRADO | 0.000000 |
| ... | ... | ... | ... | ... |
| 1790 | 52 | MADRE DE DIOS | TAMBOPATA | 0.375000 |
| 1791 | 52 | PIURA | MORROPON | 0.000000 |
| 1792 | 52 | PIURA | PIURA | 0.000000 |
| 1793 | 52 | PIURA | SULLANA | 0.000000 |
| 1794 | 52 | UCAYALI | CORONEL PORTILLO | 0.115385 |
1795 rows × 4 columns
where = shareAlarma.groupby(['semana','departamento'])['shareAlarma'].idxmax()
worst_prov_week = shareAlarma.loc[where].reset_index(drop=True)
worst_prov_week
| semana | departamento | provincia | shareAlarma | |
|---|---|---|---|---|
| 0 | 1 | AYACUCHO | LA MAR | 0.285714 |
| 1 | 1 | CAJAMARCA | CONTUMAZA | 0.000000 |
| 2 | 1 | CUSCO | LA CONVENCION | 0.142857 |
| 3 | 1 | HUANUCO | LEONCIO PRADO | 0.000000 |
| 4 | 1 | ICA | PALPA | 0.000000 |
| ... | ... | ... | ... | ... |
| 752 | 52 | LIMA | LIMA | 1.000000 |
| 753 | 52 | LORETO | REQUENA | 1.000000 |
| 754 | 52 | MADRE DE DIOS | TAMBOPATA | 0.375000 |
| 755 | 52 | PIURA | MORROPON | 0.000000 |
| 756 | 52 | UCAYALI | CORONEL PORTILLO | 0.115385 |
757 rows × 4 columns
worst_prov_week.shareAlarma.describe()
count 757.000000 mean 0.240214 std 0.330670 min 0.000000 25% 0.000000 50% 0.062500 75% 0.375000 max 1.000000 Name: shareAlarma, dtype: float64
worst_ProvWeek_alarma=worst_prov_week[worst_prov_week.shareAlarma>0].loc[:,['departamento','provincia']]
worst_ProvWeek_alarma.reset_index(drop=True,inplace=True)
worst_ProvWeek_alarma
| departamento | provincia | |
|---|---|---|
| 0 | AYACUCHO | LA MAR |
| 1 | CUSCO | LA CONVENCION |
| 2 | LA LIBERTAD | CHEPEN |
| 3 | LORETO | MAYNAS |
| 4 | MADRE DE DIOS | TAMBOPATA |
| ... | ... | ... |
| 449 | LA LIBERTAD | TRUJILLO |
| 450 | LIMA | LIMA |
| 451 | LORETO | REQUENA |
| 452 | MADRE DE DIOS | TAMBOPATA |
| 453 | UCAYALI | CORONEL PORTILLO |
454 rows × 2 columns
indexList=['departamento','provincia']
aggregator={'provincia':['count']}
worst_ProvWeek_alarma_Frequency=worst_ProvWeek_alarma.groupby(indexList,observed=True).agg(aggregator)
worst_ProvWeek_alarma_Frequency
| provincia | ||
|---|---|---|
| count | ||
| departamento | provincia | |
| AMAZONAS | UTCUBAMBA | 2 |
| ANCASH | CASMA | 1 |
| SANTA | 15 | |
| AYACUCHO | HUAMANGA | 1 |
| HUANTA | 17 | |
| ... | ... | ... |
| TUMBES | TUMBES | 12 |
| ZARUMILLA | 5 | |
| UCAYALI | ATALAYA | 18 |
| CORONEL PORTILLO | 17 | |
| PADRE ABAD | 6 |
62 rows × 1 columns
# final look
worst_ProvWeek_alarma_Frequency.columns=['weeksAffected']
worst_ProvWeek_alarma_Frequency=worst_ProvWeek_alarma_Frequency[worst_ProvWeek_alarma_Frequency.weeksAffected>2]
worst_ProvWeek_alarma_Frequency.reset_index(inplace=True)
worst_ProvWeek_alarma_Frequency
| departamento | provincia | weeksAffected | |
|---|---|---|---|
| 0 | ANCASH | SANTA | 15 |
| 1 | AYACUCHO | HUANTA | 17 |
| 2 | AYACUCHO | LA MAR | 21 |
| 3 | CUSCO | LA CONVENCION | 27 |
| 4 | HUANUCO | LEONCIO PRADO | 12 |
| 5 | ICA | ICA | 7 |
| 6 | ICA | NAZCA | 3 |
| 7 | ICA | PALPA | 15 |
| 8 | JUNIN | CHANCHAMAYO | 12 |
| 9 | JUNIN | SATIPO | 6 |
| 10 | LA LIBERTAD | ASCOPE | 4 |
| 11 | LA LIBERTAD | CHEPEN | 11 |
| 12 | LA LIBERTAD | PACASMAYO | 6 |
| 13 | LA LIBERTAD | TRUJILLO | 13 |
| 14 | LA LIBERTAD | VIRU | 3 |
| 15 | LAMBAYEQUE | CHICLAYO | 16 |
| 16 | LAMBAYEQUE | LAMBAYEQUE | 4 |
| 17 | LIMA | LIMA | 11 |
| 18 | LORETO | DATEM DEL MARAÑON | 4 |
| 19 | LORETO | LORETO | 5 |
| 20 | LORETO | MAYNAS | 32 |
| 21 | LORETO | REQUENA | 5 |
| 22 | MADRE DE DIOS | MANU | 3 |
| 23 | MADRE DE DIOS | TAHUAMANU | 3 |
| 24 | MADRE DE DIOS | TAMBOPATA | 32 |
| 25 | PIURA | HUANCABAMBA | 5 |
| 26 | PIURA | PAITA | 15 |
| 27 | PIURA | PIURA | 3 |
| 28 | PIURA | SECHURA | 11 |
| 29 | PIURA | SULLANA | 8 |
| 30 | PIURA | TALARA | 5 |
| 31 | SAN MARTIN | HUALLAGA | 3 |
| 32 | SAN MARTIN | MARISCAL CACERES | 10 |
| 33 | SAN MARTIN | MOYOBAMBA | 6 |
| 34 | SAN MARTIN | SAN MARTIN | 6 |
| 35 | SAN MARTIN | TOCACHE | 4 |
| 36 | TUMBES | CONTRALMIRANTE VILLAR | 5 |
| 37 | TUMBES | TUMBES | 12 |
| 38 | TUMBES | ZARUMILLA | 5 |
| 39 | UCAYALI | ATALAYA | 18 |
| 40 | UCAYALI | CORONEL PORTILLO | 17 |
| 41 | UCAYALI | PADRE ABAD | 6 |
alt_worstProv=alt.Chart(worst_ProvWeek_alarma_Frequency)
enc_worstProv=alt_worstProv.encode(
y='departamento:N',
x='provincia:N',
text='weeksAffected:O',
size='weeksAffected:O'
)
enc_worstProv.mark_text()
Ahora por departamento
indexList=['semana','departamento','enfermedad_text']
aggregator={'case':['sum']}
ByWeekDepa=dengue.groupby(indexList,observed=True).agg(aggregator)
ByWeekDepa_wide=ByWeekDepa.unstack().fillna(0)
ByWeekDepaAlarm=ByWeekDepa_wide.loc[:,('case','sum','2_ALARMA')]/ByWeekDepa_wide.sum(axis=1)
ByWeekDepaAlarm.name='alarmShare'
ByWeekDepaAlarm=ByWeekDepaAlarm.reset_index()
ByWeekDepaAlarm
| semana | departamento | alarmShare | |
|---|---|---|---|
| 0 | 1 | AYACUCHO | 0.222222 |
| 1 | 1 | CAJAMARCA | 0.000000 |
| 2 | 1 | CUSCO | 0.142857 |
| 3 | 1 | HUANUCO | 0.000000 |
| 4 | 1 | ICA | 0.000000 |
| ... | ... | ... | ... |
| 752 | 52 | LIMA | 1.000000 |
| 753 | 52 | LORETO | 0.478261 |
| 754 | 52 | MADRE DE DIOS | 0.375000 |
| 755 | 52 | PIURA | 0.000000 |
| 756 | 52 | UCAYALI | 0.115385 |
757 rows × 3 columns
ByWeekDepaAlarm.describe()
| semana | alarmShare | |
|---|---|---|
| count | 757.000000 | 757.000000 |
| mean | 25.394980 | 0.122590 |
| std | 14.666437 | 0.184344 |
| min | 1.000000 | 0.000000 |
| 25% | 13.000000 | 0.000000 |
| 50% | 24.000000 | 0.040000 |
| 75% | 38.000000 | 0.166667 |
| max | 52.000000 | 1.000000 |
ByWeekDepaAlarm_focus=ByWeekDepaAlarm[ByWeekDepaAlarm.alarmShare>0]
ByWeekDepaAlarm_focus.describe()
| semana | alarmShare | |
|---|---|---|
| count | 454.000000 | 454.000000 |
| mean | 22.991189 | 0.204407 |
| std | 14.144942 | 0.199880 |
| min | 1.000000 | 0.003367 |
| 25% | 12.000000 | 0.058824 |
| 50% | 21.000000 | 0.142857 |
| 75% | 34.000000 | 0.285714 |
| max | 52.000000 | 1.000000 |
edges=[-1, .10, .25, .5,1]
theLabels=["a.below10%","b.11-25%","c.26-50%","d.above50%"]
ByWeekDepaAlarm_focus.loc[:,"alarmLevels"]=pd.cut(ByWeekDepaAlarm_focus['alarmShare'],
include_lowest=True,
bins=edges,
labels=theLabels,
ordered=True)
##
ByWeekDepaAlarm_focus.head()
<ipython-input-37-bd957ecc8bae>:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy ByWeekDepaAlarm_focus.loc[:,"alarmLevels"]=pd.cut(ByWeekDepaAlarm_focus['alarmShare'],
| semana | departamento | alarmShare | alarmLevels | |
|---|---|---|---|---|
| 0 | 1 | AYACUCHO | 0.222222 | b.11-25% |
| 2 | 1 | CUSCO | 0.142857 | b.11-25% |
| 6 | 1 | LA LIBERTAD | 1.000000 | d.above50% |
| 7 | 1 | LORETO | 0.153846 | b.11-25% |
| 8 | 1 | MADRE DE DIOS | 0.250000 | b.11-25% |
alt_WorstDepa=alt.Chart(ByWeekDepaAlarm_focus).encode(x='semana:O',
y=alt.Y('departamento:N',
sort=alt.EncodingSortField(field='alarmShare',op='max',order='descending')))
enc1_WorstDepa=alt_WorstDepa.encode(
color=alt.Color('alarmLevels:O').scale(scheme="lightgreyred", reverse=False)
)
enc1_WorstDepa.mark_rect()
enc2_WorstDepa=alt_WorstDepa.encode(
text=alt.Text('alarmShare:Q', format=".1f"),
opacity=alt.condition('datum.alarmShare >= 0.3', alt.value(1), alt.value(0)))
enc2_WorstDepa.mark_text(fontStyle='bold')
enc1_WorstDepa.mark_rect() + enc2_WorstDepa.mark_text()